/******************************************************************
Creates dataset for analyzing establishment-level outcomes (all worker spells)
		// input: quitsnewhires_full
		// output: estlevel_dataset_full

*******************************************************************/

cap log close
cap log using "$logs/create_estlevel_dataset_full", replace 

* Load data
use "$files/quitsnewhires_full.dta", clear
keep fakeid_estab year treat ind_mode municipality_mode ///
 bl_singest bl_bothFM bl_signing bl_covered contract_id union_id ///
 gender age educ empmonths emp1231  ///
 earningsavgnom contracthours 
rename empmonths ten 
rename earningsavgnom earningsavgnom1

********************************************************
**Groupings
*********************************************************

* Subgroups
// all worker-year observations
gen tot = 1
// child-bearing age workers
gen cbage = (age>=20)&(age<=35)
// new workers (hired in last 12 months) 
gen neww = (ten<=12)&(ten!=.)
// probationary workers (tenure <=3 months) 
gen probw1 = (ten<=3)&(ten!=.)
// separated workers (main spell at t+1 is not the same as at t)
gen sepw = (emp1231==0)
// stable workers
gen oldw = (ten>12)&(ten!=.)

* Subgroups by gender
foreach xxx in tot cbage neww sepw probw1 oldw {
	gen `xxx'_fem = (`xxx'==1)&(gender==2)
	gen `xxx'_mal = (`xxx'==1)&(gender==1)
}

*****************************************************
** Outcomes
*****************************************************

* Wage in levels (mean)
gen wage = .
//use the average wage from the main year-spell
local agrp "2011 2012 2013 2014 2015 2016 2017"
local bgrp "78.9699590730652 83.5814316074726 88.5216652900615 94.1945931899808 104.246981522556 110.802019433334 114.067821777584"
//use December CPI to write in real terms (base year = 2015)
local n : word count `agrp'
forvalues i = 1/`n' {
	local a : word `i' of `agrp'
	local b : word `i' of `bgrp'
	replace wage = 100*(earningsavgnom1/`b') if year==`a'
}
//(sum = wage bill)
gen wbill = wage

* Wage in logs (mean)
gen lnwage = ln(wage)

* Age (mean)
// no changes needed to age variable

* Tenure (mean)
// no changes needed to ten variable

* Contracted hours (mean)
// no changes needed to contracthours variable

* Schooling (mean)
// convert education into years of schooling
gen schooling = .
replace schooling = 0 if educ==1
replace schooling = 2 if educ==2
replace schooling = 4 if educ==3
replace schooling = 6 if educ==4
replace schooling = 8 if educ==5
replace schooling = 9 if educ==6
replace schooling = 11 if educ==7
replace schooling = 13 if educ==8
replace schooling = 15 if educ==9
replace schooling = 17 if educ==10
replace schooling = 19 if educ==11


*****************************************************
** Interact outcomes with subgroups
*****************************************************	

* Wage
foreach xxx of varlist tot* cbage* neww* probw1* sepw* {
	gen wage_`xxx' = wage if `xxx'==1
}

* Log wage
foreach xxx of varlist tot* cbage* neww* probw1* sepw* oldw* {
	gen lnwage_`xxx' = lnwage if `xxx'==1
}

* Wage bill
foreach xxx of varlist tot* oldw* {
	gen wbill_`xxx' = wbill if `xxx'==1
}

* Age
foreach xxx of varlist tot* neww* probw1* sepw* {
	gen age_`xxx' = age if `xxx'==1
}

* Tenure
foreach xxx of varlist tot* {
	gen ten_`xxx' = ten if `xxx'==1
}

* Contracted hours
foreach xxx of varlist tot* {
	gen contracthours_`xxx' = contracthours if `xxx'==1
}

* Schooling
foreach xxx of varlist tot* neww* sepw* probw1* {
	gen schooling_`xxx' = schooling if `xxx'==1
}


***********************************************************************
* Estab-year dataset
***********************************************************************

keep fakeid_estab year treat ind_mode municipality_mode bl_singest bl_bothFM bl_signing bl_covered contract_id union_id ///
		 tot* cbage* neww* sepw* probw1* oldw* ///
		 wage_* lnwage_* age_* ten_* contracthours_* schooling_* ///
		 wbill_*

collapse (first) treat ind_mode municipality_mode bl_singest bl_bothFM bl_signing bl_covered contract_id union_id ///
		 (sum) tot* cbage* neww* sepw* probw1* oldw* ///
		 (mean) wage_* lnwage_* age_* ten_* contracthours_* schooling_* ///
		 (sum) wbill_* ///
		 , by(fakeid_estab year) fast


***********************************************
* Fill in the panel
***********************************************

xtset fakeid_estab year
tsfill, full

* generate exit variable
gen exit = (tot==.)

* replace wage bill and employment variables with zeros if missing (i.e. no employment of that type in that year)
foreach xxx in treat ind_mode municipality_mode bl_singest bl_bothFM bl_signing bl_covered {
	gegen temp = max(`xxx'), by(fakeid_estab)
	replace `xxx' = temp
	drop temp
}
foreach xxx in contract_id union_id {
	gegen temp = mode(`xxx'), by(fakeid_estab) minmode
	replace `xxx' = temp
	drop temp
}

* Save
save "$files/estlevel_dataset_full.dta", replace

cap log close
